{% extends "base.html" %} {% block page_content %} Predict Variant Diagnosis Probability Using Structure, Function, and In Silico Features (KCNH2 and LQT2)

Introduction

This document describes an estimate of the positive predictive value (PPV) of variant discovery for all variants in the gene KCNH2 on long QT syndrome type 2 (LQT2). Additional details on the methods used are published Kroncke et al. 2020 PLOS Genetics and at the following website: (Penetrance Estimation Details (SCN5A)). We use observed and estimated probability of LQT2 diagnosis for all known KCNH2 variants as a way to assess the per variant PPV for variant discovery. Our objective is to develope a prior estimate of the per variant PPV on LQT2 which incorporates structure, function, and in silico predictors. We use these in silico and in vitro data to generate a Bayesian prior estimate of the per variant PPV since these data can be generated in a lab setting, unlike heterozygotes/carriers of KCNH2 variants which may or may not exist. The final posterior estimate combines this derived prior and clinically phenotyped heterozygotes/carriers.

Part 1: Calculate probability of LQT2 diagnosis and LQT2 Probability Density using Various Subsets of the Literature and Cohort Data

All Literature Variants

# mut_type has includes type and isoform for all variants in the literature and in the cohort.
mut_type <- mut_type[mut_type$mut_type == "missense",]

# Cohort carrier/heterozygote counts and variant IDs. 
# Here we select only the missense variants (in-frame indels are also included as "missense")
cohort.data <- cohort.data[cohort.data$mut_type == "missense" & cohort.data$total_carriers > 0,]

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.nonoverlap.data[lit.nonoverlap.data$mut_type == "missense",]

# Here, heterozygotes/carriers from gnomAD are removed to test their influence on performance
# Remove comments in next two lines and complete subsequent evaluation to assess influence of gnomAD on
# calculations.
#d$total_carriers <- d$total_carriers - d$gnomAD # test influence of gnomAD
#d$unaff <- d$total_carriers - d$lqt2 # test influence of gnomAD

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" when evaluating ROC-AUC of n=1 variants from the literature

LQT2 empirical LQTS diagnosis probability prior

Use observed LQT2 diagnosis probability to calculate “LQTS probability density” as described in previous publication. Plot probability density versus residue

# Mean squared error
mse <- function(sm) {
  mean((sm$residuals)^2*(sm$weights))
}

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2329 -0.1653 -0.0232  0.0763  0.7561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2332     0.0135   17.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2323 on 789 degrees of freedom
##   (467 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model)#p*(1-p)

# Derive alpha and beta from weighted mean and MSE (estimated variance)
estBetaParams <- function(mu, var) {
  alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
  beta <- alpha * (1 / mu - 1)
  return(params = list(alpha = alpha, beta = beta))
}

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.54041284982238   beta0 =  1.77720039413882"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

# Plot literature observed LQT2 penetrance versus residue number
m<- d %>% 
  select(resnum, pmean = penetrance_lqt2) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(d[,"penetrance_lqt2"]~as.numeric(d[,"resnum"]), span = 0.15)
plot(d$resnum, d$penetrance_lqt2, xlab ="Residue", ylab = "LQT2 Penetrance Estimate")
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

Calculate LQTS probability densities

With the updated empirical priors applied to carrier counts, calculate “LQTS probability density” as described in previous publication.

# NOTE: adjust 3rd argument given to funcdist, "d,", to d[d$total_carriers>1,] when 
# evaluating ROC of total_carriers == 1

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
mut_type <- mut_type[mut_type$mut_type == "missense" & mut_type$isoform == "A",]
for(rec in 1:nrow(mut_type)){
  if (!is.na(mut_type[rec, "resnum"])) {
    l<-length(h2.covariates[h2.covariates$var == mut_type[rec, "var"],"var"])
    for (m in 1:l){
    h2.covariates[h2.covariates$var == mut_type[rec, "var"], 
                  c("lqt2_dist", "lqt2_dist_weight", "resnum")][m,] <- c(funcdist(mut_type[rec, "resnum"], mut_type[rec, "var"], d, h2dist, "penetrance_lqt2", "sigmoid", 7), mut_type[rec, "resnum"])
    }
  }
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]
m<- h2.covariates %>% 
  select(resnum, pmean = lqt2_dist) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(h2.covariates[,"lqt2_dist"]~as.numeric(h2.covariates[,"resnum"]), span = 0.15)
plot(h2.covariates$resnum, h2.covariates$lqt2_dist, xlab ="Residue", ylab = "LQT2 Penetrance Density", xlim=c(0,1160))
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

Calculate Weighted Spearman Correlation Coefficients

Evaluate weighted Spearman correlations coefficients between observed LQT2 diagnosis probability in the literature and various potential predictors

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so we can estimate LQT2 diagnosis probability for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

calcPval=function(xName,yName,weightName,nPerms,new.mat2){
  # Pulls out variables

  x=new.mat2[,xName] 
  y=new.mat2[,yName] 
  w=new.mat2[,weightName]
  x2=x[!is.na(x)]
  y2=y[!is.na(x)]
  w2=w[!is.na(x)]

  # Calculate the real correlation
  realCorr=weightedCorr(x2,y2,method='spearman',weights=w2)
  # Do permutations, calculate fake correlations
  permutedCorrList=c()
  for(permNum in 1:nPerms){
    permutedX=sample(x2,length(x2),replace=FALSE)
    wCorrSim=weightedCorr(permutedX,y2,method='spearman',weights=w2)
    permutedCorrList=c(permutedCorrList,wCorrSim)
  }
  permutedCorrList2=abs(permutedCorrList)
  realCorr2=abs(realCorr)
  
  # Calculate pvalue
  summ=sum(realCorr2<permutedCorrList2)
  pValue=summ/nPerms
  return(list(realCorr,pValue,length(x2)))
}

calcAllPvals=function(yList,xList,nPerms,weightName,new.mat2){
  i=0
  resultTable=data.frame()
  for(yName in yList){
    for(xName in xList){
      i=i+1
      result=calcPval(xName,yName,weightName,nPerms,new.mat2)
      resultTable[i,'x']=xName
      resultTable[i,'y']=yName
      resultTable[i,'nPerms']=nPerms
      resultTable[i,'weightedCorr']=result[[1]]
      resultTable[i,'pValue']=result[[2]]
      resultTable[i,'n']=result[[3]]
      #print(resultTable[i,'pValue'])
    }
  }
  print(resultTable)
  return(resultTable)
}


yList=c('penetrance_lqt2')
xList=c('hm_ssPeak','hm_tailPeak','hm_vhalfact','hm_vhalfinact','hm_recovfrominact', 'hm_taudeact_fast', 
        'ht_ssPeak','ht_tailPeak','ht_vhalfact','ht_vhalfinact','ht_recovfrominact', 'ht_taudeact_fast',
        'pph2_prob', 'provean_score', "blast_pssm",
        'pamscore', 'aasimilaritymat', "lqt2_dist", "revel_score")
tmp<-d[!is.na(d$penetrance_lqt2) & !is.na(d$revel_score),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue   n
## 1          hm_ssPeak penetrance_lqt2   1000  0.319565106  0.262  20
## 2        hm_tailPeak penetrance_lqt2   1000 -0.564758635  0.000 100
## 3        hm_vhalfact penetrance_lqt2   1000  0.147612664  0.291  68
## 4      hm_vhalfinact penetrance_lqt2   1000  0.669666858  0.000  29
## 5  hm_recovfrominact penetrance_lqt2   1000 -0.734693865  0.011  12
## 6   hm_taudeact_fast penetrance_lqt2   1000 -0.375296727  0.034  42
## 7          ht_ssPeak penetrance_lqt2   1000 -0.464626144  0.023  34
## 8        ht_tailPeak penetrance_lqt2   1000 -0.616133062  0.000  83
## 9        ht_vhalfact penetrance_lqt2   1000 -0.061932733  0.736  41
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.173738913  0.454  26
## 11 ht_recovfrominact penetrance_lqt2   1000 -0.407537689  0.378   6
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.009703229  0.971  14
## 13         pph2_prob penetrance_lqt2   1000  0.392919452  0.000 741
## 14     provean_score penetrance_lqt2   1000 -0.585053878  0.000 741
## 15        blast_pssm penetrance_lqt2   1000 -0.250452543  0.000 741
## 16          pamscore penetrance_lqt2   1000 -0.116758403  0.026 750
## 17   aasimilaritymat penetrance_lqt2   1000  0.018401459  0.703 750
## 18         lqt2_dist penetrance_lqt2   1000  0.583417065  0.000 748
## 19       revel_score penetrance_lqt2   1000  0.664409462  0.000 750
tmp<-d[!is.na(d$ht_tailPeak) & !is.na(d$penetrance_lqt2) & !is.na(d$revel_score),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue  n
## 1          hm_ssPeak penetrance_lqt2   1000  0.690504615  0.088 10
## 2        hm_tailPeak penetrance_lqt2   1000 -0.428867090  0.004 66
## 3        hm_vhalfact penetrance_lqt2   1000 -0.093938286  0.627 35
## 4      hm_vhalfinact penetrance_lqt2   1000  0.629956576  0.048 15
## 5  hm_recovfrominact penetrance_lqt2   1000 -0.772690799  0.286  4
## 6   hm_taudeact_fast penetrance_lqt2   1000 -0.228864954  0.405 17
## 7          ht_ssPeak penetrance_lqt2   1000 -0.464626144  0.012 34
## 8        ht_tailPeak penetrance_lqt2   1000 -0.616133062  0.000 83
## 9        ht_vhalfact penetrance_lqt2   1000 -0.061096836  0.744 40
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.173348992  0.443 25
## 11 ht_recovfrominact penetrance_lqt2   1000 -0.407537689  0.402  6
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.009703229  0.970 14
## 13         pph2_prob penetrance_lqt2   1000  0.332107233  0.006 81
## 14     provean_score penetrance_lqt2   1000 -0.455185289  0.000 81
## 15        blast_pssm penetrance_lqt2   1000  0.055910704  0.687 81
## 16          pamscore penetrance_lqt2   1000  0.058560727  0.628 83
## 17   aasimilaritymat penetrance_lqt2   1000  0.063049344  0.628 83
## 18         lqt2_dist penetrance_lqt2   1000  0.755839461  0.000 83
## 19       revel_score penetrance_lqt2   1000  0.612183017  0.000 83

Scale all covariates

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 diagnosis probability)

Literature Variants Where N = 1 Variants are removed

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.nonoverlap.data[lit.nonoverlap.data$mut_type == "missense",]

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 2,"weight"] <- 0.000 # This is changed to "< 2" here to evaluate ROC-AUC of n=1 variants from the literature

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" when evaluating ROC-AUC of n=1 variants from the literature

LQT2 empirical diagnosis probability prior

Use observed LQT2 diagnosis probability to calculate “LQTS probability density” as described in previous publication. Plot LQTS probability density versus residue

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2329 -0.1653 -0.0232  0.0763  0.7561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2332     0.0135   17.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2323 on 789 degrees of freedom
##   (467 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model) #p*(1-p)

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.54041284982238   beta0 =  1.77720039413882"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

Calculate LQTS probability densities

With the updated empirical priors applied to carrier counts, calculate “LQTS probability density” as described in previous publication.

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
mut_type <- mut_type[mut_type$mut_type == "missense" & mut_type$isoform == "A",]
for(rec in 1:nrow(mut_type)){
  if (!is.na(mut_type[rec, "resnum"])) {
    h2.covariates[h2.covariates$var == mut_type[rec, "var"], 
                  c("lqt2_dist", "lqt2_dist_weight", "resnum")] <- c(funcdist(mut_type[rec, "resnum"], mut_type[rec, "var"], d[d$total_carriers>1,], h2dist, "penetrance_lqt2", "sigmoid", 7), mut_type[rec, "resnum"])
  }
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so  we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

Scale all covariates

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 diagnosis probability)

Literature and Cohort Combined (for final predictions)

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.cohort.data[lit.cohort.data$mut_type == "missense",]

# add all possible variants
allvariants<-data.frame(unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"var"]), stringsAsFactors = F)
names(allvariants)<-"var"
allvariants$isoform<-unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"isoform"])
allvariants$mut_type<-unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"mut_type"])
a<-merge(d,allvariants,all = T)
a[is.na(a$total_carriers),"total_carriers"] <- 0
a[is.na(a$lqt2),"lqt2"] <- 0
d<-a

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" here to evaluate ROC-AUC of n=1 variants from the literature

LQT2 empirical diagnosis probability prior

Use observed LQT2 diagnosis probability to calculate “LQTS probability density” as described in previous publication. Plot diagnosis probability density versus residue

# Mean squared error
mse <- function(sm) {
  mean((sm$residuals)^2*(sm$weights))
}

# Derive alpha and beta from weighted mean and MSE (estimated variance)
estBetaParams <- function(mu, var) {
  alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
  beta <- alpha * (1 / mu - 1)
  return(params = list(alpha = alpha, beta = beta))
}

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3424 -0.2429 -0.0341  0.0654  0.6315 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.34273    0.01253   27.35   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2541 on 1017 degrees of freedom
##   (6240 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model) #p*(1-p)

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.854290338628253   beta0 =  1.63833103141397"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

Calculate LQTS probability densities and annotate function and structural location

With the updated empirical priors applied to carrier counts, calculate “LQTS probability density” as described in previous publication. !!! NOTE: since these data are truly the “best estimates” we include all variants in the calculation such that unique scores are by residue not by variant.

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
ld<-0
for(rec in seq(2,1159,1)){
  #print(rec)
  ld <- funcdist(rec, "var", d[!is.na(d$total_carriers) & d$total_carriers>0 & d$isoform == "A" & d$mut_type != "nonsense",], h2dist, "penetrance_lqt2", "sigmoid", 7)
  h2.covariates[!is.na(h2.covariates$isoform) & !is.na(h2.covariates$resnum) & h2.covariates$isoform=="A" & h2.covariates$resnum == rec & !is.na(h2.covariates$var) & !is.na(h2.covariates$mut_type) & h2.covariates$mut_type == "missense","lqt2_dist"] <- ld[1]
  h2.covariates[!is.na(h2.covariates$isoform) & !is.na(h2.covariates$resnum) & h2.covariates$isoform=="A" & h2.covariates$resnum == rec & !is.na(h2.covariates$var)& !is.na(h2.covariates$mut_type) & h2.covariates$mut_type == "missense","lqt2_dist_weight"] <-ld[2] 
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so  we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

# annotate structural location (hotspot) 
d$Structure<-NA
d[!is.na(d$lqt2_dist) & d$lqt2_dist<0.1,"Structure"]<-"Non_Hotspot"
d[!is.na(d$lqt2_dist) & d$lqt2_dist>=0.1 & d$lqt2_dist<0.4,"Structure"]<-"Mild_Hotspot"
d[!is.na(d$lqt2_dist) & d$lqt2_dist>=0.4,"Structure"]<-"Hotspot"

# annotate functional perturbation
d$Function<-NA
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak<0.25,"Function"]<-"Severe Dominant LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak<0.5 & d$ht_tailPeak>=0.25,"Function"]<-"Dominant LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=0.5 & d$ht_tailPeak<0.75,"Function"]<-"LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=0.75 & d$ht_tailPeak<1.25,"Function"]<-"Normal"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=1.25,"Function"]<-"GOF"

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 diagnosis probability)

LQT2 diagnosis probability in Cohort Data for validation

load("prepared_data.RData")
load("lit_all_data_checkpoint.RData")

cohort.data$weight = 1-1/(0.01+cohort.data$total_carriers)
cohort.data$penetrance_lqt2 <- cohort.data$lqt2/cohort.data$total_carriers

for (variant in cohort.data$var) {
  if (!is.na(match(variant, d$var))) {
     cohort.data[cohort.data$var == variant, c("p_mean_w","alpha", "beta", "lqt2_lit", "unaff_lit", "total_carriers_lit", "lqt2_dist")] <- d[match(variant, d$var), c("p_mean_w", "alpha", "beta", "lqt2", "unaff", "total_carriers", "lqt2_dist")]
  }
}

m<- cohort.data %>% 
  select(resnum, pmean = penetrance_lqt2) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(cohort.data[,"penetrance_lqt2"]~as.numeric(cohort.data[,"resnum"]), span = 0.15)
plot(cohort.data$resnum, cohort.data$penetrance_lqt2, xlab ="Residue", ylab = "LQT2 diagnosis probability", col = "red", pch=19)
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

# Add covariates to cohort dataset
cohort.data <- merge(cohort.data, h2.covariates, all = TRUE)
cohort.data <- unique(cohort.data) 


# Save cohort data
save(cohort.data, file = "cohort_checkpoint.RData")

Part 2: Coverage plots

Bootstrap and get the coverage rate

  1. Use the observed diagnosis probability from as the TRUE diagnosis probability, generate n binomial observations

  2. Use the final EM algorithm posterior as the prior for Beta-Binomial, incorporate data from step (1), generate the posterior distribution, and get 95% credible interval.

  3. Check whether the interval cover the true diagnosis probability from Step 1.

  4. Repeat Step 1 to Step 3 N times to get the coverage rate.

Bootstrap function

BootsCoverage <- function(var,n=100,N=1000,true){
  
  # var: variant name
  # n: number of subjects in the new data
  # N: number of Bootstrap

  # extract the "true" diagnosis probability
  true.p <- d[d$var==var,true]
  
  # generate binomial data 
  event <- rbinom(N,n,true.p)

  # get the posterior credible interval
  alpha <- d$alpha[which(d$var==var)] 
  beta <- d$beta[which(d$var==var)] 

  new.alpha <- alpha + event
  new.beta <- beta + n - event

  lb <- qbeta(0.025,new.alpha,new.beta)
  ub <- qbeta(0.975,new.alpha,new.beta)

  # change lb to floor of nearest 0.1
  lb <- floor(lb*20)/20
  ub <- ceiling(ub*20)/20
  
  return(sum(lb < true.p & ub > true.p)/N) 
}

Plot coverage

Observed diagnosis probability as the “true” diagnosis probability

The coverage plot where observed diagnosis probability is the “true” diagnosis probability and one hundred new observations are added is shown below.

Part 3: Variance explained

Pearson R^2 and Spearman Rho Against EM Posterior from Cohort

# load data, literature + gnomAD + cohort
load("lit_all_data_checkpoint.RData")
load("cohort_checkpoint.RData")

calcPval=function(xName,yName,weightName,nPerms,new.mat2){
  # Pulls out variables

  x=new.mat2[,xName] 
  y=new.mat2[,yName] 
  w=new.mat2[,weightName]
  x2=x[!is.na(x)]
  y2=y[!is.na(x)]
  w2=w[!is.na(x)]

  # Calculate the real correlation
  realCorr=weightedCorr(x2,y2,method='spearman',weights=w2)
  # Do permutations, calculate fake correlations
  permutedCorrList=c()
  for(permNum in 1:nPerms){
    permutedX=sample(x2,length(x2),replace=FALSE)
    wCorrSim=weightedCorr(permutedX,y2,method='spearman',weights=w2)
    permutedCorrList=c(permutedCorrList,wCorrSim)
  }
  permutedCorrList2=abs(permutedCorrList)
  realCorr2=abs(realCorr)
  
  # Calculate pvalue
  summ=sum(realCorr2<permutedCorrList2)
  pValue=summ/nPerms
  return(list(realCorr,pValue,length(x2)))
}

calcAllPvals=function(yList,xList,nPerms,weightName,new.mat2){
  i=0
  resultTable=data.frame()
  for(yName in yList){
    for(xName in xList){
      i=i+1
      result=calcPval(xName,yName,weightName,nPerms,new.mat2)
      resultTable[i,'x']=xName
      resultTable[i,'y']=yName
      resultTable[i,'nPerms']=nPerms
      resultTable[i,'weightedCorr']=result[[1]]
      resultTable[i,'pValue']=result[[2]]
      resultTable[i,'n']=result[[3]]
      #print(resultTable[i,'pValue'])
    }
  }
  print(resultTable)
  return(resultTable)
}

# Select covariates from isoform "A" in cohort dataset
cohort.data <- cohort.data[!is.na(cohort.data$total_carriers) & cohort.data$isoform == "A" & cohort.data$mut_type == "missense",]

tmp<-cohort.data[!is.na(cohort.data$provean_score) & !is.na(cohort.data$revel_score) & !is.na(cohort.data$pamscore),]
yList=c('penetrance_lqt2')
xList=c('hm_ssPeak','hm_tailPeak','hm_vhalfact','hm_vhalfinact','hm_recovfrominact', 'hm_taudeact_fast', 
        'ht_ssPeak','ht_tailPeak','ht_vhalfact','ht_vhalfinact','ht_recovfrominact', 'ht_taudeact_fast',
        'pph2_prob', 'provean_score', "blast_pssm",
        'pamscore', 'aasimilaritymat', "lqt2_dist", "revel_score", 'p_mean_w')
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue   n
## 1          hm_ssPeak penetrance_lqt2   1000  -0.13782555  0.797   8
## 2        hm_tailPeak penetrance_lqt2   1000  -0.45322142  0.007  46
## 3        hm_vhalfact penetrance_lqt2   1000   0.24454237  0.235  35
## 4      hm_vhalfinact penetrance_lqt2   1000   0.58824513  0.109  13
## 5  hm_recovfrominact penetrance_lqt2   1000  -0.01793348  0.973   8
## 6   hm_taudeact_fast penetrance_lqt2   1000   0.09551958  0.726  26
## 7          ht_ssPeak penetrance_lqt2   1000  -0.59432412  0.074  11
## 8        ht_tailPeak penetrance_lqt2   1000  -0.72598217  0.000  39
## 9        ht_vhalfact penetrance_lqt2   1000   0.04702470  0.833  22
## 10     ht_vhalfinact penetrance_lqt2   1000  -0.51551414  0.076  14
## 11 ht_recovfrominact penetrance_lqt2   1000   0.31291231  0.531   4
## 12  ht_taudeact_fast penetrance_lqt2   1000   0.40856426  0.188  13
## 13         pph2_prob penetrance_lqt2   1000   0.29038377  0.000 268
## 14     provean_score penetrance_lqt2   1000  -0.38127676  0.000 268
## 15        blast_pssm penetrance_lqt2   1000  -0.07798665  0.272 268
## 16          pamscore penetrance_lqt2   1000  -0.05859981  0.420 268
## 17   aasimilaritymat penetrance_lqt2   1000   0.12912560  0.086 268
## 18         lqt2_dist penetrance_lqt2   1000   0.46761571  0.000 268
## 19       revel_score penetrance_lqt2   1000   0.45089391  0.000 268
## 20          p_mean_w penetrance_lqt2   1000   0.51861171  0.000 268
rm(tmp)
rm(t)
i=0
tmp<-data.frame()
for (x in xList){
  i=i+2
  tmp[i-1,"Feature"]<-x
  t<-d[!is.na(d[,x]) & d$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  tmp[i,"Feature"]<-paste(x,"_cohort")
  t<-d[!is.na(d[,x]) & d$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  foo <- boot(t, function(data,indices)
  weightedCorr(t[indices,x],t$penetrance_lqt2[indices], method="spearman", weights = t$weight[indices]), R=1000)
  tmp[i-1,"Spearman"]<-foo$t0
  tmp[i-1,"Spearman_low"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[1][[1]]
  tmp[i-1,"Spearman_high"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[2][[1]]
  tmp[i-1,"n"]<-length(t[,x])
  t<-cohort.data[!is.na(cohort.data[,x]) & cohort.data$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  foo <- boot(t, function(data,indices)
  weightedCorr(t[indices,x],t$penetrance_lqt2[indices], method="spearman", weights = t$weight[indices]), R=1000)
  tmp[i,"Spearman"]<-foo$t0
  tmp[i,"Spearman_low"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[1][[1]]
  tmp[i,"Spearman_high"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[2][[1]]
  tmp[i,"n"]<-length(t[,x])
}

forestplot(tmp$Feature,tmp$Spearman,tmp$Spearman_low,tmp$Spearman_high)

# reset "tmp" variable
tmp<-cohort.data[!is.na(cohort.data$provean_score) & !is.na(cohort.data$revel_score) & !is.na(cohort.data$pamscore),]
# Weighted R2 between observed LQT2 penetrance and post-test probability
foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.3102379
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1949797 0.4221402
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 diagnosis probability density versus observed cohort LQT2 diagnosis probability")
## [1] "LQT2 diagnosis probability density versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.2386498
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1379722 0.3462039
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL score versus observed cohort LQT2 diagnosis probability")
## [1] "REVEL score versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.2200169
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1273068 0.3306973
# Evaluate only variants with Heterozygous peak tail current measured.
tmp<-cohort.data[!is.na(cohort.data$ht_tailPeak),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue  n
## 1          hm_ssPeak penetrance_lqt2   1000   0.97286492  0.304  4
## 2        hm_tailPeak penetrance_lqt2   1000  -0.25115090  0.257 28
## 3        hm_vhalfact penetrance_lqt2   1000   0.10800601  0.699 15
## 4      hm_vhalfinact penetrance_lqt2   1000  -0.58787950  0.307  6
## 5  hm_recovfrominact penetrance_lqt2   1000  -1.00000000  0.000  2
## 6   hm_taudeact_fast penetrance_lqt2   1000   0.15426767  0.809  6
## 7          ht_ssPeak penetrance_lqt2   1000  -0.66768740  0.044 12
## 8        ht_tailPeak penetrance_lqt2   1000  -0.73814391  0.000 40
## 9        ht_vhalfact penetrance_lqt2   1000   0.22559402  0.352 21
## 10     ht_vhalfinact penetrance_lqt2   1000  -0.57071390  0.042 15
## 11 ht_recovfrominact penetrance_lqt2   1000   0.31291231  0.515  4
## 12  ht_taudeact_fast penetrance_lqt2   1000   0.55832008  0.087 11
## 13         pph2_prob penetrance_lqt2   1000   0.29181628  0.095 39
## 14     provean_score penetrance_lqt2   1000  -0.31546899  0.072 39
## 15        blast_pssm penetrance_lqt2   1000   0.20233110  0.277 39
## 16          pamscore penetrance_lqt2   1000  -0.02844874  0.883 40
## 17   aasimilaritymat penetrance_lqt2   1000   0.28888518  0.087 40
## 18         lqt2_dist penetrance_lqt2   1000   0.67282397  0.000 40
## 19       revel_score penetrance_lqt2   1000   0.72321081  0.000 40
## 20          p_mean_w penetrance_lqt2   1000   0.76322146  0.000 40
foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.5669383
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3108545 0.7676876
model <- lm(penetrance_lqt2~ht_tailPeak, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("Heterozygously measured peak tail current versus observed cohort LQT2 diagnosis probability")
## [1] "Heterozygously measured peak tail current versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.4873147
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2911190 0.6737717
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 probability density versus observed cohort LQT2 diagnosis probability")
## [1] "LQT2 probability density versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.4168252
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1532764 0.6646178
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL score versus observed cohort LQT2 diagnosis probability")
## [1] "REVEL score versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.4662318
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2608208 0.7180945

Variance explained from literature dataset

load("lit_all_data_checkpoint.RData")

tmp<-d[!is.na(d$provean_score) & !is.na(d$penetrance_lqt2) & !is.na(d$lqt2_dist) & !is.na(d$revel_score),]

foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.8168311
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.7699019 0.8555075
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 probability density versus observed literature LQT2 diagnosis probability")
## [1] "LQT2 probability density versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.4902899
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3922374 0.5897895
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL versus observed literature LQT2 diagnosis probability")
## [1] "REVEL versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.3920345
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3285295 0.4567138
# Evaluate only variants with Heterozygous peak tail current measured.
tmp<-d[!is.na(d$ht_tailPeak) & !is.na(d$provean_score) & !is.na(d$penetrance_lqt2) & !is.nan(d$penetrance_lqt2) & !is.na(d$lqt2_dist),]

foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.8884971
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.8080615 0.9436230
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 probability density versus observed literature LQT2 diagnosis probability")
## [1] "LQT2 probability density versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.5945224
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.4239511 0.7534652
model <- lm(penetrance_lqt2~ht_tailPeak, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("Heterozygous peak tail current versus observed literature LQT2 diagnosis probability")
## [1] "Heterozygous peak tail current versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.3390998
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1810048 0.5319460
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL versus observed literature LQT2 diagnosis probability")
## [1] "REVEL versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.4286368
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2332227 0.6124817

Part 4: ROC and AUC plots

ROCs of observed cohort LQT2 diagnosis probability with 0.5 as cutoff all variants.

AUCs from ROCs observed cohort LQT2 diagnosis probability at multiple cutoffs

AUCs from ROCs predicting EM posteriors

## [1] "276   231"
## [1] "276   229"
## [1] "276   226"
## [1] "276   224"
## [1] "276   217"
## [1] "276   205"
## [1] "276   205"
## [1] "276   202"
## [1] "276   202"
## [1] "276   171"
## [1] "276   169"
## [1] "276   164"
## [1] "276   144"
## [1] "276   144"
## [1] "276   136"

ROCs of observed cohort LQT2 diagnosis probability with single observation variants.

ROCs of observed literature LQT2 diagnosis probability with 0.5 as cutoff.

AUCs from ROCs observed literature LQT2 diagnosis probability at multiple cutoffs

## [1] "741   234"
## [1] "741   231"
## [1] "741   229"
## [1] "741   223"
## [1] "741   218"
## [1] "741   211"
## [1] "741   208"
## [1] "741   205"
## [1] "741   205"
## [1] "741   193"
## [1] "741   192"
## [1] "741   190"
## [1] "741   183"
## [1] "741   179"
## [1] "741   174"

ROC and AUC for N = 1 variants from the literature.

AUC for N = 1 variants from the literature.

## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"

Part 5: Forest plots

The code to produce the forest plots is shown in the code chunk.

rm(d)

# Loading combined literature, gnomAD, and cohort dataset 
load("lit_all_data_checkpoint.RData")
d<-d[d$total_carriers>0 & d$mut_type!="nonsense" & d$isoform=="A",]
d<-d[!is.na(d$var),]

mean.post <- (d$alpha + d$lqt2)/(d$alpha+d$beta+d$total_carriers)
mean.prior <- (d$alpha)/(d$alpha+d$beta)

lower.prior <- qbeta(0.025,d$alpha,d$beta)
higher.prior <- qbeta(0.975,d$alpha,d$beta) 

lower.post <- qbeta(0.025,d$alpha+d$lqt2,d$beta+d$total_carriers-d$lqt2)
higher.post <- qbeta(0.975,d$alpha+d$lqt2,d$beta+d$total_carriers-d$lqt2) 

forest.data.post <- data.frame(variant = d$var, mean=mean.post,
                          lower=lower.post, higher=higher.post, resnum=d$resnum, tc=d$total_carriers, lqt2=d$lqt2)
forest.data.post$group<-"posterior"

forest.data.prior <- data.frame(variant = d$var, mean=mean.prior,
                          lower=lower.prior, higher=higher.prior, resnum=d$resnum, tc=d$total_carriers, lqt2=d$lqt2)
forest.data.prior$group<-"prior"

forest.data<-rbind(forest.data.post, forest.data.prior)
forest.data<-forest.data[order(forest.data$resnum, forest.data$variant),]
forest.data$label<-""
forest.data[forest.data$group=="posterior","label"]<-paste(forest.data[forest.data$group=="posterior","lqt2"], "/", forest.data[forest.data$group=="posterior","tc"])

#define colours for dots and bars
dotCOLS = c("#866D4B","#000000")
barCOLS = c("#FFFFFF","#FFFFFF")

plotg <- function(a,b){
  fd<-forest.data[a:b,]
  png( paste("Plots/", a, "-",b,"pics.png",sep=""),res=300,height=10,width=10,units="in")
  p<-ggplot(fd, aes(x=reorder(variant,-resnum), y=mean, ymin=lower, ymax=higher, col=group, fill=group)) + 
  geom_text(data=fd, aes(x=reorder(variant,-resnum), label=label)) +
#specify position here
  geom_linerange(size=2,position=position_dodge(width = 1)) +
  geom_hline(yintercept=1, lty=1) +
  geom_hline(yintercept=0, lty=1) +
#specify position here too
  geom_point(size=2, shape=21, colour="white", stroke = 0.5,position=position_dodge(width = 1)) +
  scale_fill_manual(values=barCOLS)+
  scale_color_manual(values=dotCOLS)+
  scale_y_continuous(name="LQT2 Diagnosis Probability", limits = c(0, 1)) +
  coord_flip() +
  theme_minimal()
  print(p)
  dev.off() 
}

sapply(0:30*50+1,function(x) plotg(x,x+49) )

The forests plots are pasted below, where gold bars are posteriors, and black bars are EM priors. Variant name is given to the left and the number of LQT2 affected / total heterozygote count is given above the dot indicating the posterior mean.

{% endblock %}